package org.apache.solr.search.similarities;

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *     http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

import org.apache.lucene.search.similarities.Distribution;
import org.apache.lucene.search.similarities.DistributionLL;
import org.apache.lucene.search.similarities.DistributionSPL;
import org.apache.lucene.search.similarities.IBSimilarity;
import org.apache.lucene.search.similarities.Lambda;
import org.apache.lucene.search.similarities.LambdaDF;
import org.apache.lucene.search.similarities.LambdaTTF;
import org.apache.lucene.search.similarities.Normalization;
import org.apache.lucene.search.similarities.Similarity;
import org.apache.solr.common.params.SolrParams;
import org.apache.solr.schema.SimilarityFactory;
import org.apache.solr.search.similarities.DFRSimilarityFactory; // javadoc

/**
 * Factory for {@link IBSimilarity}
 * <p>
 * You must specify the implementations for all three components of the
 * Information-Based model (strings).
 * <ol>
 * <li>{@link Distribution distribution}: Probabilistic distribution used to
 * model term occurrence
 * <ul>
 * <li>{@link DistributionLL LL}: Log-logistic</li>
 * <li>{@link DistributionLL SPL}: Smoothed power-law</li>
 * </ul>
 * </li>
 * <li>{@link Lambda lambda}: &lambda;<sub>w</sub> parameter of the probability
 * distribution
 * <ul>
 * <li>{@link LambdaDF DF}: <code>N<sub>w</sub>/N</code> or average number of
 * documents where w occurs</li>
 * <li>{@link LambdaTTF TTF}: <code>F<sub>w</sub>/N</code> or average number of
 * occurrences of w in the collection</li>
 * </ul>
 * </li>
 * <li>{@link Normalization normalization}: Term frequency normalization
 * <blockquote>Any supported DFR normalization listed in
 * {@link DFRSimilarityFactory}</blockquote></li>
 * </ol>
 * <p>
 * Optional settings:
 * <ul>
 * <li>discountOverlaps (bool): Sets
 * {@link IBSimilarity#setDiscountOverlaps(boolean)}</li>
 * </ul>
 * 
 * @lucene.experimental
 */

public class IBSimilarityFactory extends SimilarityFactory {
	private boolean discountOverlaps;
	private Distribution distribution;
	private Lambda lambda;
	private Normalization normalization;

	@Override
	public void init(SolrParams params) {
		super.init(params);
		discountOverlaps = params.getBool("discountOverlaps", true);
		distribution = parseDistribution(params.get("distribution"));
		lambda = parseLambda(params.get("lambda"));
		normalization = DFRSimilarityFactory.parseNormalization(
				params.get("normalization"), params.get("c"), params.get("mu"),
				params.get("z"));
	}

	private Distribution parseDistribution(String expr) {
		if ("LL".equals(expr)) {
			return new DistributionLL();
		} else if ("SPL".equals(expr)) {
			return new DistributionSPL();
		} else {
			throw new RuntimeException("Invalid distribution: " + expr);
		}
	}

	private Lambda parseLambda(String expr) {
		if ("DF".equals(expr)) {
			return new LambdaDF();
		} else if ("TTF".equals(expr)) {
			return new LambdaTTF();
		} else {
			throw new RuntimeException("Invalid lambda: " + expr);
		}
	}

	@Override
	public Similarity getSimilarity() {
		IBSimilarity sim = new IBSimilarity(distribution, lambda, normalization);
		sim.setDiscountOverlaps(discountOverlaps);
		return sim;
	}
}
